/*
Goal: to estimate UI saved, which is the difference between UI if the treated were not treated and UI observed among the treated. 
	1)	Aggregate the observed wages and UI of early-treated firms by firm size.
		a.	Length of these data: 5, number of firm size bins
	2)	For “non-early” firms, aggregate UI and wages by Firm size and	8 bins of income percentiles
		a.	Reshape long, somehow
		b. 	Calculate the ratio of UI to wages
		c.	Length of these data: 5 firm size bins by 8 income bins = 40
	3)	Merge (1) with the tables of regression outputs, by firm size
		a. multiply wages with 1 / (1 + \beta)
		b.	Length of these data: 5 firm size bins.
	4)	Merge (2) with (3) by firm size
		a. multiply the wage product from (3) with the ratio from (2)
		b. Subtract observed UI from the treated from the result of (a). this is UI saved for the treated.
		c.	Length of these data: 5 firm size bins by 8 income bins = 40
	5) 	Aggregate in some way.
*/

do "${dodir}/make_globals.do"

// step 0: merge the distributional data with the 941 data, after making the same sample restrictions as in the regressions

//// start with the same set of data we use for the regressions: modified 941 data
use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g app_quarter_first app_quarter_second using ${datadir}/reg_data, clear

//// make same sample restrictions and data cleaning decisions

// drop if 0 or 1 employees at any point in 2018 or 2019
drop if min_emp_2018==0 | min_emp_2018==1 | min_emp_2019==0 | min_emp_2019==1 | min_emp_2020q1==0 | min_emp_2020q1==1

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.

// drop if never treated
keep if g>0 & g~=.	

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and total_compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

// replace med_wage to be "100000*employee count" if mean comp exceeds 100k
replace med_wage=100000*num_employees if mean_comp>100000 & mean_comp~=.

// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250

//// limit to 2018 data
	
keep if q<5

////// allow the first quarter treated to be the second
g app_quarter=min(app_quarter_first, app_quarter_second)

format app_quarter %tq

collapse (sum) med_wage (mean) num_employees (lastnm) firm_size_bin app_quarter, by(id)

//// merge with distributional data created in "distributional_data_XX.do"
	
merge 1:1 id using ${datadir}/distributional_data, update 

tabstat totwages totui if app_year_first!=., s(sum) format(%20.0fc) by(_merge)

//// limit to intersection

keep if firm_size_bin!=.

drop _merge

// step 1)	Aggregate the observed wages and UI of early-treated firms by firm size and income percentile
//		a.	Length of these data: 40, number of firm size bins

replace totuicount = 0 if totuicount ==. & totcount != .
replace totui = 0 if totui ==. & totwages != .
save $datadir/temp_0, replace

use $datadir/temp_0, clear

gen ui_rep_rate = totui / totwages
	replace ui_rep_rate = . if totwages<20000

keep if ui_rep_rate~=.

gen wt=totwages+totui

//Early PPP vs. Late PPP
gen early = (app_quarter == yq(2020,2))

preserve

	keep if early==1
	
	keep id w_p* u_p* totui totwages firm_size_bin

	reshape long w_p u_p, i(id)

	g wages_p=w_p*totwages
	g ui_p=u_p*totui

	rename _j percentile

	rename w_p frac_w
	rename u_p frac_ui

	collapse (sum) wages_p ui_p (mean) frac*, by(firm_size_bin percentile)
	*collapse (sum) wages_p ui_p (mean) frac*, by(firm_size_bin)
	save ${datadir}/temp_early_treat_firm_size, replace

restore

// Step	2)	For “late” PPP recipient firms firms, aggregate UI and wages by: Firm size and 8 bins of income percentiles
// then calculate the ratio of UI to wages


keep if early==0

keep id w_p* u_p* totui totwages firm_size_bin

reshape long w_p u_p, i(id)

g wages_p=w_p*totwages
g ui_p=u_p*totui

rename _j percentile

rename w_p frac_w
rename u_p frac_ui

collapse (sum) wages_p ui_p (mean) frac*, by(firm_size_bin percentile)
*collapse (sum) wages_p ui_p (mean) frac*, by(firm_size_bin)

gen ratio_ui_wages=ui_p/wages_p

keep firm_size_bin percentile ratio_ui_wages
*keep firm_size_bin ratio_ui_wages

save ${datadir}/temp_late_treat_firm_size_centile, replace


//	Step	3)	Merge (1) with the tables of regression outputs, by firm size
//		a. multiply wages with 1 / (1 + \beta)
//		b. length of resulting data is 40 


clear

forval xx=0/5{
	append using ${datadir}/reg_tables/reg_table_growth_bin`xx'
}

g ATT_coeff=ATT if stat=="coeff"
g ATT_bot_ci=ATT if stat=="bot_95"
g ATT_top_ci=ATT if stat=="top_95"

collapse (lastnm) ATT_coeff ATT_bot_ci ATT_top_ci, by(firm_size_bin)

// merge on firm size base
merge 1:m firm_size_bin using ${datadir}/temp_early_treat_firm_size, update nogen
*merge 1:1 firm_size_bin using ${datadir}/temp_early_treat_firm_size, update nogen
drop if firm_size_bin==5

g wages_nontreat=wages_p/(1+ATT_coeff)

keep wages_nontreat firm_size_bin ui_p percentile wages_p
*keep wages_nontreat firm_size_bin ui_p wages_p
save ${datadir}/temp_early_treat_firm_size_coefs, replace


//	Step	4)	Merge (2) with (3) by firm size
//		a. multiply the wage product from (3) with the ratio from (2)
//		b. Subtract observed UI from the treated from the result of (a). this is UI saved for the treated.
//		c.	Length of these data: 5 firm size bins by 8 income bins = 40

use ${datadir}/temp_early_treat_firm_size_coefs, clear

merge 1:1 firm_size_bin percentile using ${datadir}/temp_late_treat_firm_size_centile, update nogen
*merge 1:1 firm_size_bin using ${datadir}/temp_late_treat_firm_size_centile, update nogen

g ui_nontreat=wages_nontreat*ratio_ui_wages

g ui_saved = ui_nontreat - ui_p

save ${datadir}/temp_ui_saved_early_treat, replace

//	Step 5: Aggregate in some way

use ${datadir}/temp_ui_saved_early_treat, clear
	
export excel $outdir/distrib/dist_ui.xlsx, replace firstrow(variables)



/*





// What to do with this?

//Add the scaling factors
gen cond_bus_type = businesstype_first
replace cond_bus_type = "non-profit" if inlist(businesstype_first, "501(c)3 â Non Profit", "501(c)19 â Non Profit Veterans","501(c)3 â Non Profit", "501(c)6 â Non Profit Membership", "Non-Profit Childcare Center", "Non-Profit Organization", "Tribal Concerns", "501(c) â Non Profit except 3,4,6,")
replace cond_bus_type = "sole" if inlist(businesstype_first, "Independent Contractors", "Qualified Joint-Venture (spouses)", "Rollover as Business Start-Ups (ROB", "Self-Employed Individuals", "Single Member LLC", "Sole Proprietorship")
replace cond_bus_type = "partnership" if inlist(businesstype_first, "Cooperative", "Housing Co-op", "Joint Venture", "Limited Liability Partnership", "Partnership")
replace cond_bus_type = "c_corp" if inlist(businesstype_first, "Corporation")
replace cond_bus_type = "s_corp" if inlist(businesstype_first, "Subchapter S Corporation")
replace cond_bus_type = "ownership" if inlist(businesstype_first, "Employee Stock Ownership Plan(ESOP)", "Tenant in Common", "Trust")
replace cond_bus_type = "prof_assoc" if inlist(businesstype_first, "Professional Association")
replace cond_bus_type = "other" if inlist(businesstype_first, "")
replace cond_bus_type = "llc" if inlist(businesstype_first, "Limited  Liability Company(LLC)")

merge m:1 cond_bus_type using "${datadir}/scale_ups/w2", nogen

preserve

	gcollapse (sum) totui reduced_ui_w_* reduced_ui_u_* [pw=dollar_w2_wt] if took_ppp == 1
	export excel "${outdir}/distrib/total_dist_ui.xlsx", replace firstrow(variables)

restore

*/
